# Table_A1.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.


library(Bullock)  # for latexTable(), latexTablePDF(), lNA(), qw()
library(car)      # for Recode()
library(dplyr)    # for %>%, filter(), mutate(), rename(), select(), etc.
library(here)     # for here::here()
library(readr)    # for read_csv()
library(tibble)   # for rownames_to_column()

ANES2000 <- read_csv(here::here("data/ANES/ANES_2000.csv"))
ANES2008 <- read_csv(here::here("data/ANES/ANES_2008.csv"))
ANES2012 <- read_csv(here::here("data/ANES/ANES_2012.csv"))
EGSS3    <- read_csv(here::here("data/ANES/EGSS_3.csv"))



# **************************************************************************
# LOAD RECONCILED CODING OF THE OPEN-ENDED RESPONSES
# **************************************************************************
# At least two coders coded each set of open-ended responses. For each set, 
# Bullock or Rader then "reconciled" the codings, producing a standard  
# version that we use for analysis.
ANES2000_r <- ANES2000 %>%
  rename(
    reconciliation = RECONCILIATION,
    response       = Response)
ANES2008_r <- ANES2008 %>% rename(reconciliation = RECONCILIATION)
ANES2012_r <- ANES2012 %>% rename(reconciliation = RECONCILIATION)
EGSS3_r    <- EGSS3    %>% rename(reconciliation = RECONCILIATION)



# **************************************************************************
# CLEAN THE DATA A LITTLE ####
# **************************************************************************
# Code "<RF>" responses as "Don't know."  Only two responses are affected, 
# and only in the 2008 ANES.  
ANES2008_r %>%
  filter(grepl('<RF>', response)) %>%
  print(n = 100)
ANES2008_r$reconciliation[ANES2008_r$response == '<RF>'] <- "Don't know"



# **************************************************************************
# ORGANIZE THE RECONCILED CODING OF THE OPEN-ENDED RESPONSES ####
# **************************************************************************
RobertsLevels <- c(
  'Mentions Chief Justice and United States',
  'Mentions Chief Justice and Supreme Court',
  'Mentions Chief Justice and something like Supreme Court',
  'Mentions Supreme Court and something like Chief Justice',
  'Mentions Supreme Court and "justice"',
  'Mentions "Chief Justice" or "Chief of Justice"',
  'Mentions Supreme Court',
  'Mentions "justice"',
  'Mentions "judge" or "court"',
  'Mentions news media or journalism profession',
  'Mentions government, politics, or policy',
  'Mentions other job or office',
  'Don\'t know',
  'Uninterpretable')
ANES2000_r <- ANES2000_r %>%
  mutate(reconciliation = ordered(reconciliation, levels = RobertsLevels))
ANES2008_r <- ANES2008_r %>%
  mutate(reconciliation = ordered(reconciliation, levels = RobertsLevels))
ANES2012_r <- ANES2012_r %>%
  mutate(reconciliation = ordered(reconciliation, levels = RobertsLevels))
EGSS3_r <- EGSS3_r %>%
  mutate(reconciliation = ordered(reconciliation, levels = RobertsLevels))



# **************************************************************************
# MAKE LATEX TABLE: FOURTEEN-CATEGORY CLASSIFICATION OF RESPONSES IN THE 
# 2008 ANES
# **************************************************************************
categories2008 <- ANES2008_r %>%
  select(reconciliation) %>%
  group_by(reconciliation, .drop = FALSE) %>%
  count() %>%
  filter(reconciliation != '<NA>') %>%
  ungroup() %>%
  mutate("percentage" = round(n / sum(n) * 100, 1)) %>%
  rownames_to_column(var = "category") %>%
  remove_rownames()


# COLUMN NAMES, ROW NAMES, AND CAPTION
# We add smart ("curly") quotations to the row names.
categories2008_colNames <- list(
  c('category',  'number',        'percentage'),
  c('number',    'of responses',  'of responses'))
categories2008_rowNames <- as.character(categories2008$reconciliation) %>% 
  gsub('\\"', '``', .) %>%
  gsub('(\\w)``', '\\1\'\'', .)
categories2008_caption <- paste0(
  '\\textit{Open-ended responses in the 2008 ANES.}  Cell entries are frequencies and percentages of responses to the ``Chief Justice\'\' office-recognition question in the 2008 ANES.  N~=~',
  sum(categories2008$n),
  '.  Each response was placed in exactly one category.  Categories are ordered, such that responses were placed in larger-numbered categories only if they could not be placed in smaller-numbered categories.  See \\autopageref{AppendixPassageOpenEndedCodingSchemeOrdering} for details.')


# MAKE LATEX TABLE
categories2008Latex <- latexTable(
  mat                = categories2008 %>% select(-reconciliation) %>% as.matrix(),
  SE_table           = FALSE,
  formatNumbers      = FALSE,
  rowNames           = categories2008_rowNames,
  colNames           = categories2008_colNames,
  horizOffset        = '-.30in',
  spacerColumns      = c(0, 1, 2),
  spacerColumnsWidth = ".6em",
  headerFooter       = TRUE,
  footerRows         = NULL,
  caption            = categories2008_caption, 
  captionMargins     = qw(".50in .25in"),
  commandName        = 'TabANESOpenEndedCodes',
  callCommand        = FALSE)


# ADJUST HORIZONTAL ALIGNMENT OF NUMBERS IN EACH COLUMN (VIA NUMPRINT CODES)
rowNums <- which(grepl('N\\{2\\}\\{2\\}', categories2008Latex))
categories2008Latex[rowNums[1]] <- sub('N\\{2\\}\\{2\\}', 'N{4}{0}', categories2008Latex[rowNums[1]])
categories2008Latex[rowNums[2]] <- sub('N\\{2\\}\\{2\\}', 'N{5}{2}', categories2008Latex[rowNums[2]])
categories2008Latex             <- sub('N\\{4\\}\\{2\\}', 'N{6}{1}', categories2008Latex)



# **************************************************************************
# TABULATE "DEGREES OF CORRECTNESS" ####
# **************************************************************************
correctnessRecodes <- c(
  "Mentions Chief Justice and United States" = "Correct",
  "Mentions Chief Justice and Supreme Court" = "Correct",  
  "Mentions Chief Justice and something like Supreme Court" = "Nearly correct and mentions Supreme Court",
  "Mentions Supreme Court and something like Chief Justice" = "Nearly correct and mentions Supreme Court",
  'Mentions Supreme Court and "justice"'                    = "Nearly correct and mentions Supreme Court",
  'Mentions "Chief Justice" or "Chief of Justice"'          = "Nearly correct and doesn't mention Supreme Court",
  'Mentions Supreme Court'                                  = "Nearly correct and mentions Supreme Court",
  'Mentions "justice"'                       = "Incorrect",
  'Mentions "judge" or "court"'              = "Incorrect",
  'Mentions government, politics, or policy' = "Incorrect",
  'Mentions other job or office'             = "Incorrect")

tabulateCorrectness <- function (reconciledCoding) {
  dplyr::recode(reconciledCoding, !!!correctnessRecodes) %>%
  na_if('Mentions news media or journalism profession') %>%
  na_if('Uninterpretable') %>%
  droplevels %>%
  table %>% prop.table %>% round(3)
} 

correctnessDF <- data.frame(
  ANES2000 = tabulateCorrectness(ANES2000_r$reconciliation),
  ANES2008 = tabulateCorrectness(ANES2008_r$reconciliation) %>% as.numeric,
  EGSS3    = tabulateCorrectness(EGSS3_r$reconciliation)    %>% as.numeric,
  ANES2012 = tabulateCorrectness(ANES2012_r$reconciliation) %>% as.numeric) 
colnames(correctnessDF)[1:2] <- c('', 'ANES2000')



# **************************************************************************
# MAKE LATEX TABLE FOR DEGREES OF CORRECTNESS ####
# **************************************************************************
# COLUMN NAMES, ROW NAMES, AND CAPTION
# We add smart ("curly") quotations to the row names.
correctnessDF_colNames <- list(
  qw("ANES ANES EGSS ANES"),
  qw("2000 2008 2011 2012"))
correctnessDF_rowNames <- as.character(correctnessDF[, 1])
correctnessDF_caption <- '\\textit{Open-ended responses to the Chief Justice question by degree of correctness.}  Cell entries are percentages of responses to the ``Chief Justice\'\' office-recognition question.'
correctnessDF_footerRow <- c(
    'Number of observations',
    sapply(
      list(ANES2000_r, ANES2008_r, EGSS3_r, ANES2012_r),
      function (x) lNA(x$reconciliation)))
  

# CONVERT COLUMNS TO STRINGS FOR PRETTY FORMATTING
correctnessDF <- correctnessDF %>%
  select(-1) %>%                           # remove the rownames
  mutate_all(.funs = list(~. * 100)) %>%   # multiply by 100 to get percentages
  mutate_all(as.character())


# MAKE LATEX TABLE
correctnessDF_Latex <- latexTable(
  mat                = as.matrix(correctnessDF),
  SE_table           = FALSE,
  formatNumbers      = FALSE,
  # decimalPlaces      = 1,
  rowNames           = correctnessDF_rowNames,
  colNames           = correctnessDF_colNames,
  # hspace             = '-.30in',
  spacerColumns      = 0:3,
  spacerColumnsWidth = ".6em",
  headerFooter       = TRUE,
  footerRows         = correctnessDF_footerRow,
  caption            = correctnessDF_caption, 
  captionMargins     = qw(".25in .125in"),
  commandName        = 'TabOpenEndedCorrectness',
  callCommand        = FALSE)

# ADD A TRAILING 0
# Change "9" to "9.0"
correctnessDF_Latex <- sub('&&    (\\d) &&', '&&  \\1.0 &&', correctnessDF_Latex)

# ADJUST SPACING OF NUMBERS IN THE COLUMNS
correctnessDF_Latex <- gsub('N\\{2\\}\\{2\\}', 'N{3}{1}', correctnessDF_Latex)

# CORRECT BUG IN THE FOOTER ROW
# latexTable() inserts a "\tabularnewline" where it should instead insert an 
# ampersand. It does this because it thinks that each entry in the footer row
# must span two columns. We now correct this problem.
# correctnessDF_Latex <- gsub('\\multicolumn\\{2\\}', '\\multicolumn{1}', correctnessDF_Latex)
correctnessDF_Latex <- gsub(
  pattern     = '(\\multicolumn\\{)2(\\}\\{c\\}\\{\\d+\\}).*', 
  replacement = '\\11\\2 &&', 
  x           = correctnessDF_Latex)
indForCorrection <- grep('bottomrule', correctnessDF_Latex) - 1
correctnessDF_Latex[indForCorrection] <- sub(
  pattern     = ' &+', 
  replacement = '\\\\tabularnewline', 
  x           = correctnessDF_Latex[indForCorrection])



# **************************************************************************
# SAVE LATEX FILE ####
# **************************************************************************
# The saved .tex file contains Tables A1 and A2.  
latexTablePDF(
  latexTable         = list(categories2008Latex, correctnessDF_Latex), 
  firstPageEmpty     = FALSE,
  container            = FALSE,                                        # FALSE if inserting LaTeX tables into appendix of my paper
  outputFilenameStem = here::here("float_output/Tables_A1-A2"),       
  overwriteExisting  = TRUE,
  writePDF           = FALSE,
  writeTex           = TRUE,
  openPDFOnExit      = FALSE)
